Read and wrangle in data

output_12 <- read.csv(here("data","bivalve_model","LookUpTable_201214.csv"))
output_14 <- read.csv(here("data","bivalve_model","LookUpTable_201416.csv"))

Merging outputs of 2012 and 2014 outputs

combined <- base::merge(output_12, output_14, by = c("site", "long", "lat"), all = TRUE) %>% 
  mutate(yield_12 = yield.x,
         yield_14 = yield.y
         ) %>% 
  dplyr::select("site", "long", "lat", "yield_12", "yield_14")

T testing the two yield groups

Is there a signififcant difference between the (paired) yield values of the 2012 and 2014 versions of the model?

## converting data frames to be paired vectors## 

combined_nocopies <- base::merge(output_12, output_14, by = c("site", "long", "lat"), all = FALSE) %>% 
  mutate(yield_12 = yield.x,
         yield_14 = yield.y
         ) %>% 
  dplyr::select("site", "long", "lat", "yield_12", "yield_14")

vec12 <- combined_nocopies %>% 
  pull(yield_12)

vec14 <- combined_nocopies %>% 
  pull(yield_14)
## conducting t test ##

t.test(x = vec14, y = vec12, paired = TRUE)
## 
##  Paired t-test
## 
## data:  vec14 and vec12
## t = -21.139, df = 4442, p-value < 2.2e-16
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  -448882.6 -372689.2
## sample estimates:
## mean difference 
##       -410785.9
############## OUTPUT ###################
#   Paired t-test
# 
# data:  vec14 and vec12
# t = -21.139, df = 4442, p-value < 2.2e-16
# alternative hypothesis: true mean difference is not equal to 0
# 95 percent confidence interval:
#  -448882.6 -372689.2
# sample estimates:
# mean difference 
#       -410785.9 

Yes, these outputs are different.

Calculating the mean and sd values for each year below:

mean(vec12)
## [1] 5985567
sd(vec12)
## [1] 1541905
mean(vec14)
## [1] 5574781
sd(vec14)
## [1] 2260238

Initial visualizations of model outputs

combined_long <- combined %>%
  pivot_longer(yield_12:yield_14,
               names_to = "year",
               names_prefix = "yield_",
               values_to = "yield")

## yield by latitude
ggplot(data = combined_long, aes(x = lat, y = yield)) +
  geom_line(aes(color = year)) +
  theme_minimal()

##Yikes this output is messy##

## histogram (shows left skew for both but more dramatic left skew for 2014-16 time frames)
ggplot(data = combined_long, aes(x = yield, fill = year)) +
  geom_histogram() +
  xlim(c(2300000, 11800000)) +
  theme_minimal()

##boxplot and violin plot
ggplot(data = combined_long, aes(x = year, y = yield, fill = year)) +
  geom_boxplot() +
  geom_jitter(color="black", size=0.4, alpha=0.9) +
  theme(
      legend.position="none",
      plot.title = element_text(size=11)
    ) +
  theme_minimal()

ggplot(data = combined_long, aes(x = year, y = yield, fill = year)) +
  geom_violin() +
  #geom_jitter(color="black", size=0.4, alpha=0.9) +
  theme(
      legend.position="none",
      plot.title = element_text(size=11)
    ) +
  theme_minimal()

Working with SST data and bivalve yields

Bringing in the SST data to look at if this could be a driver for the difference in model yields across years. This is likely due to the blob, but POC could also be a factor.

To avoid having to rerun the code in netCDFfile_testing.Rmd and netCDFfile_newbivalvedata.Rmd to wrangle the SST data, I’ve written those matricies as csvs and will read them in here to work with them.

# write.csv(matrix_sst_2012, here("data", "matrix_sst_2012.csv"))
# write.csv(matrix_sst_2014, here("data", "matrix_sst_2014.csv"))

matrix_sst_2012 <- read.csv(here("data", "matrix_sst_2012.csv"))
matrix_sst_2014 <- read.csv(here("data", "matrix_sst_2014.csv"))
#need to reformat the SST matricies

sst_2012 <- matrix_sst_2012 %>% 
  pivot_longer(MODIS.20121001:MODIS.20140301,
                         names_to = "month",
                         names_prefix = "MODIS.",
                         values_to = "SST") %>%
  group_by(sites_lon, sites_lat) %>% 
  summarize(sst_ave = mean(SST)) %>% 
  mutate(lat = sites_lat,
         long = sites_lon,
         sst_ave12 = sst_ave) %>% 
  ungroup() %>% 
  dplyr::select("long", "lat", "sst_ave12")

sst_2014 <- matrix_sst_2014 %>% 
  pivot_longer(MODIS.20141001:MODIS.20160301,
                         names_to = "month",
                         names_prefix = "MODIS.",
                         values_to = "SST") %>%
  group_by(sites_lon, sites_lat) %>% 
  summarize(sst_ave = mean(SST)) %>% 
  mutate(lat = sites_lat,
         long = sites_lon,
         sst_ave14 = sst_ave) %>% 
  ungroup() %>% 
  dplyr::select("long", "lat", "sst_ave14")


all_sst <- base::merge(sst_2012, sst_2014, by = c("long", "lat"), all = TRUE) %>% 
  pivot_longer(sst_ave12:sst_ave14,
               names_to = "year",
               names_prefix = "sst_ave",
               values_to = "sst_ave")


all_sst_yield <- base::merge (all_sst, combined_long, by = c("lat", "long", "year"), all = TRUE) %>%
  dplyr::select("site", "long", "lat", "year", "sst_ave", "yield")

Yield and SST visualizations

ggplot(data = all_sst_yield, aes(x = sst_ave, y = yield)) +
  geom_point(aes(color = year)) +
  theme_minimal()

## sst only ##
ggplot(data = all_sst_yield, aes(x = year, y = sst_ave, fill = year)) +
  geom_violin() +
  #geom_jitter(color="black", size=0.4, alpha=0.9) +
  theme(
      legend.position="none",
      plot.title = element_text(size=11)
    ) +
  theme_minimal()

#ggplot_build(p)$data

SST versus yield for 2012 and 2014 models

all_12 <-  all_sst_yield %>% 
  filter(year == "12")

ggplot(data = all_12, aes(x = sst_ave, y = yield)) +
  geom_point(color = "#F8766D") +
  xlim(c(10,21)) +
  ylim(c(2500000, 11580000)) +
  theme_minimal()

all_14 <- all_sst_yield %>% 
  filter(year == "14")

ggplot(data = all_14, aes(x = sst_ave, y = yield)) +
  geom_point(color = "#00BFC4") +
  xlim(c(10,21)) +
    ylim(c(2500000, 11580000)) +
  theme_minimal()

Working with POC data and bivalve yields

Similar to the SST work above, bringing in the POC data to look at if this could be a driver for the difference in model yields across years.

To avoid having to rerun the code in netCDFfile_testing.Rmd and netCDFfile_newbivalvedata.Rmd to wrangle the POC data, I’ve written those matricies as csvs and will read them in here to work with them.

# write.csv(matrix_poc_2012, here("data", "matrix_poc_2012.csv"))
# write.csv(matrix_poc_2014, here("data", "matrix_poc_2014.csv"))

matrix_poc_2012 <- read.csv(here("data", "matrix_poc_2012.csv"))
matrix_poc_2014 <- read.csv(here("data", "matrix_poc_2014.csv"))
#need to reformat the POC matricies

poc_2012 <- matrix_poc_2012 %>% 
  pivot_longer(MODIS.20121001:MODIS.20140301,
                         names_to = "month",
                         names_prefix = "MODIS.",
                         values_to = "POC") %>%
  group_by(sites_lon, sites_lat) %>% 
  summarize(poc_ave = mean(POC)) %>% 
  mutate(lat = sites_lat,
         long = sites_lon,
         poc_ave12 = poc_ave) %>% 
  ungroup() %>% 
  dplyr::select("long", "lat", "poc_ave12")

poc_2014 <- matrix_poc_2014 %>% 
  pivot_longer(MODIS.20141001:MODIS.20160301,
                         names_to = "month",
                         names_prefix = "MODIS.",
                         values_to = "POC") %>%
  group_by(sites_lon, sites_lat) %>% 
  summarize(poc_ave = mean(POC)) %>% 
  mutate(lat = sites_lat,
         long = sites_lon,
         poc_ave14 = poc_ave) %>% 
  ungroup() %>% 
  dplyr::select("long", "lat", "poc_ave14")


all_poc <- base::merge(poc_2012, poc_2014, by = c("long", "lat"), all = TRUE) %>% 
  pivot_longer(poc_ave12:poc_ave14,
               names_to = "year",
               names_prefix = "poc_ave",
               values_to = "poc_ave")


all_poc_yield <- base::merge (all_poc, combined_long, by = c("lat", "long", "year"), all = TRUE) %>%
  dplyr::select("site", "long", "lat", "year", "poc_ave", "yield")

Yield and POC visualizations

ggplot(data = all_poc_yield, aes(x = poc_ave, y = yield)) +
  geom_point(aes(color = year)) +
  theme_minimal()

## poc only ##
ggplot(data = all_poc_yield, aes(x = year, y = poc_ave, fill = year)) +
  geom_violin() +
  #geom_jitter(color="black", size=0.4, alpha=0.9) +
  theme(
      legend.position="none",
      plot.title = element_text(size=11)
    ) +
  theme_minimal()

#ggplot_build(p)$data

POC versus yield for 2012 and 2014 models

all_12 <-  all_poc_yield %>% 
  filter(year == "12")

ggplot(data = all_12, aes(x = poc_ave, y = yield)) +
  geom_point(color = "#F8766D") +
  xlim(c(77,2000)) +
  ylim(c(2500000, 11580000)) +
  theme_minimal()

all_14 <- all_poc_yield %>% 
  filter(year == "14")

ggplot(data = all_14, aes(x = poc_ave, y = yield)) +
  geom_point(color = "#00BFC4") +
  xlim(c(77,2000)) +
    ylim(c(2500000, 11580000)) +
  theme_minimal()

POC vs SST

all_poc_sst_yield <- base::merge (all_poc_yield, all_sst_yield, by = c("site","lat", "long", "year", "yield"), all = TRUE)
ggplot(data = all_poc_sst_yield, aes(x = sst_ave, y = poc_ave)) +
  geom_point(aes(color = year)) +
  geom_smooth(method = lm, color = "black", aes(linetype = year), se = FALSE) +
  theme_minimal()